In [40]:
!pip install pandas-profiling==2.7.1
Requirement already satisfied: pandas-profiling==2.7.1 in /usr/local/lib/python3.6/dist-packages (2.7.1)
Requirement already satisfied: ipywidgets>=7.5.1 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (7.5.1)
Requirement already satisfied: pandas!=1.0.0,!=1.0.1,!=1.0.2,>=0.25.3 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (1.0.5)
Requirement already satisfied: htmlmin>=0.1.12 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (0.1.12)
Requirement already satisfied: visions[type_image_path]==0.4.1 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (0.4.1)
Requirement already satisfied: numpy>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (1.18.5)
Requirement already satisfied: requests>=2.23.0 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (2.23.0)
Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (0.16.0)
Requirement already satisfied: matplotlib>=3.2.0 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (3.2.2)
Requirement already satisfied: jinja2>=2.11.1 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (2.11.2)
Requirement already satisfied: tqdm>=4.43.0 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (4.49.0)
Requirement already satisfied: phik>=0.9.10 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (0.10.0)
Requirement already satisfied: confuse>=1.0.0 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (1.3.0)
Requirement already satisfied: missingno>=0.4.2 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (0.4.2)
Requirement already satisfied: tangled-up-in-unicode>=0.0.4 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (0.0.6)
Requirement already satisfied: scipy>=1.4.1 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (1.4.1)
Requirement already satisfied: astropy>=4.0 in /usr/local/lib/python3.6/dist-packages (from pandas-profiling==2.7.1) (4.0.1.post1)
Requirement already satisfied: nbformat>=4.2.0 in /usr/local/lib/python3.6/dist-packages (from ipywidgets>=7.5.1->pandas-profiling==2.7.1) (5.0.7)
Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.6/dist-packages (from ipywidgets>=7.5.1->pandas-profiling==2.7.1) (4.3.3)
Requirement already satisfied: widgetsnbextension~=3.5.0 in /usr/local/lib/python3.6/dist-packages (from ipywidgets>=7.5.1->pandas-profiling==2.7.1) (3.5.1)
Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.6/dist-packages (from ipywidgets>=7.5.1->pandas-profiling==2.7.1) (4.10.1)
Requirement already satisfied: ipython>=4.0.0; python_version >= "3.3" in /usr/local/lib/python3.6/dist-packages (from ipywidgets>=7.5.1->pandas-profiling==2.7.1) (5.5.0)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas!=1.0.0,!=1.0.1,!=1.0.2,>=0.25.3->pandas-profiling==2.7.1) (2018.9)
Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas!=1.0.0,!=1.0.1,!=1.0.2,>=0.25.3->pandas-profiling==2.7.1) (2.8.1)
Requirement already satisfied: networkx>=2.4 in /usr/local/lib/python3.6/dist-packages (from visions[type_image_path]==0.4.1->pandas-profiling==2.7.1) (2.5)
Requirement already satisfied: attrs>=19.3.0 in /usr/local/lib/python3.6/dist-packages (from visions[type_image_path]==0.4.1->pandas-profiling==2.7.1) (20.2.0)
Requirement already satisfied: Pillow; extra == "type_image_path" in /usr/local/lib/python3.6/dist-packages (from visions[type_image_path]==0.4.1->pandas-profiling==2.7.1) (7.0.0)
Requirement already satisfied: imagehash; extra == "type_image_path" in /usr/local/lib/python3.6/dist-packages (from visions[type_image_path]==0.4.1->pandas-profiling==2.7.1) (4.1.0)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests>=2.23.0->pandas-profiling==2.7.1) (1.24.3)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests>=2.23.0->pandas-profiling==2.7.1) (2.10)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.23.0->pandas-profiling==2.7.1) (3.0.4)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests>=2.23.0->pandas-profiling==2.7.1) (2020.6.20)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib>=3.2.0->pandas-profiling==2.7.1) (1.2.0)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib>=3.2.0->pandas-profiling==2.7.1) (0.10.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib>=3.2.0->pandas-profiling==2.7.1) (2.4.7)
Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from jinja2>=2.11.1->pandas-profiling==2.7.1) (1.1.1)
Requirement already satisfied: numba>=0.38.1 in /usr/local/lib/python3.6/dist-packages (from phik>=0.9.10->pandas-profiling==2.7.1) (0.48.0)
Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from confuse>=1.0.0->pandas-profiling==2.7.1) (3.13)
Requirement already satisfied: seaborn in /usr/local/lib/python3.6/dist-packages (from missingno>=0.4.2->pandas-profiling==2.7.1) (0.10.1)
Requirement already satisfied: jupyter-core in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.2.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (4.6.3)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.2.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (2.6.0)
Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.2.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (0.2.0)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from traitlets>=4.3.1->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (1.15.0)
Requirement already satisfied: decorator in /usr/local/lib/python3.6/dist-packages (from traitlets>=4.3.1->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (4.4.2)
Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.6/dist-packages (from widgetsnbextension~=3.5.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (5.3.1)
Requirement already satisfied: jupyter-client in /usr/local/lib/python3.6/dist-packages (from ipykernel>=4.5.1->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (5.3.5)
Requirement already satisfied: tornado>=4.0 in /usr/local/lib/python3.6/dist-packages (from ipykernel>=4.5.1->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (5.1.1)
Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (50.3.0)
Requirement already satisfied: pygments in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (2.6.1)
Requirement already satisfied: pexpect; sys_platform != "win32" in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (4.8.0)
Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (1.0.18)
Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (0.8.1)
Requirement already satisfied: pickleshare in /usr/local/lib/python3.6/dist-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (0.7.5)
Requirement already satisfied: PyWavelets in /usr/local/lib/python3.6/dist-packages (from imagehash; extra == "type_image_path"->visions[type_image_path]==0.4.1->pandas-profiling==2.7.1) (1.1.1)
Requirement already satisfied: llvmlite<0.32.0,>=0.31.0dev0 in /usr/local/lib/python3.6/dist-packages (from numba>=0.38.1->phik>=0.9.10->pandas-profiling==2.7.1) (0.31.0)
Requirement already satisfied: nbconvert in /usr/local/lib/python3.6/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (5.6.1)
Requirement already satisfied: terminado>=0.8.1 in /usr/local/lib/python3.6/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (0.8.3)
Requirement already satisfied: Send2Trash in /usr/local/lib/python3.6/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (1.5.0)
Requirement already satisfied: pyzmq>=13 in /usr/local/lib/python3.6/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (19.0.2)
Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.6/dist-packages (from pexpect; sys_platform != "win32"->ipython>=4.0.0; python_version >= "3.3"->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (0.6.0)
Requirement already satisfied: wcwidth in /usr/local/lib/python3.6/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython>=4.0.0; python_version >= "3.3"->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (0.2.5)
Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (0.8.4)
Requirement already satisfied: testpath in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (0.4.4)
Requirement already satisfied: bleach in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (3.2.0)
Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (0.3)
Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (1.4.2)
Requirement already satisfied: defusedxml in /usr/local/lib/python3.6/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (0.6.0)
Requirement already satisfied: webencodings in /usr/local/lib/python3.6/dist-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (0.5.1)
Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.5.1->pandas-profiling==2.7.1) (20.4)
In [41]:
import os, sys
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import pandas_profiling
import scipy.stats  as stats
from scipy.stats import chi2
from scipy.stats import chi2_contingency

1. Entendimiento del negocio y de los datos

In [42]:
df = pd.read_excel("Datos_parcial2.xlsx", index_col ="ID")
df
Out[42]:
COD_SAL VIA CANTI PBK PNK FOBPES FOBDOL FLETES SEGURO OTROSG PAIS
ID
9087 BOG 4 140.00 0.02660 0.02460 5.038243 1.67604 517.00 0.00 0.00 PAISES BAJOS
3481 BOG 4 142.00 0.06101 0.05636 4.309442 1.43917 45.24 1.09 0.00 ECUADOR
9368 BOG 4 1.00 0.00140 0.00100 0.274905 0.09251 0.00 NaN 0.00 PANAMA
21953 IPI 3 2.00 0.45793 0.39397 38.356037 12.72600 269.54 NaN 89.84 ECUADOR
14218 BUN 1 5328.00 5.91942 5.32800 17.028346 5.62038 0.00 NaN 0.00 BRASIL
... ... ... ... ... ... ... ... ... ... ... ...
17669 BUN 1 610.05 0.61005 0.61005 5.202518 1.72487 0.00 NaN 0.00 PANAMA
19754 BUN 1 89.00 0.01431 0.01289 2.002604 0.66098 0.00 NaN 0.00 BOLIVIA
2260 BOG 4 2.00 0.00061 0.00039 0.011849 0.00400 0.00 NaN 0.00 COSTA RICA
22474 IPI 3 10.00 0.01674 0.01500 0.131346 0.04420 0.00 NaN 0.00 ECUADOR
18587 BUN 1 33.75 0.00397 0.00388 0.063958 0.02111 0.25 NaN 0.00 PERU

13926 rows × 11 columns

In [43]:
pandas_profiling.ProfileReport(df)



Out[43]:

Describa el resultado del perfilamiento de datos e indique el tipo de cada variable.

Revisando el perfilamiento de datos se puede evidenciar que el 1.4% de las filas son duplicadas, dadas las variables considero que se deberían remover pues es bastante improbable que se presenten tales coincidencias dadas las unidades de las variables como Peso bruto y Peso neto de la mercancía, columnas como OTROSG y SEGURO cuentan con registros nulos que, de acuerdo al diccionario y la frecuencia de los valores presentes en estas variables puede deberse a valores no marcados como 0. Asimismo, se encontró que variables como FLETES y las previamente mencionadas SEGURO y OTROSG cuentan con gran cantidad de valores 0 superior al 70%

In [44]:
for columna in df.columns:
  print(columna)
  print(df[columna].unique(), len(df[columna].unique()))
COD_SAL
['BOG' 'IPI' 'BUN' 'CTG' 'CLO' 'MDE' 'SMR' 'BAQ' 'RCH' 'BGA' 'PEI' 'TRB'
 'CUC' 'MAI' 'ADZ'] 15
VIA
[4 3 1 7] 4
CANTI
[140.   142.     1.   ... 104.1  610.05  33.75] 5096
PBK
[0.0266  0.06101 0.0014  ... 0.46237 0.61005 0.01431] 9250
PNK
[0.0246  0.05636 0.001   ... 0.30018 0.391   0.61005] 7459
FOBPES
[5.03824328 4.30944201 0.27490549 ... 0.01184856 0.13134604 0.06395802] 13266
FOBDOL
[1.67604 1.43917 0.09251 ... 1.72487 0.66098 0.02111] 11473
FLETES
[ 517.     45.24    0.   ...   14.3  1024.    129.99] 3044
SEGURO
[0.00000e+00 1.09000e+00         nan 2.79200e+01 5.90000e-01 3.70000e+00
 8.81000e+01 3.00000e-02 6.00000e-02 4.80000e-01 1.50000e-01 2.30000e+01
 1.80000e+00 3.17400e+01 8.16000e+00 4.60000e-01 2.74440e+02 3.00000e+01
 4.13300e+01 4.50000e-01 5.00000e+01 5.60000e-01 7.60000e-01 3.15000e+00
 9.56000e+00 1.53000e+00 8.44000e+00 1.40000e+01 2.94000e+00 1.42000e+00
 7.40000e-01 2.65000e+00 9.14600e+01 4.03000e+00 9.40000e-01 3.90000e-01
 7.00000e-02 1.12000e+00 7.71000e+00 1.13000e+00 8.95000e+00 4.90000e-01
 7.81000e+00 1.35000e+00 4.86000e+00 2.70000e-01 2.23000e+00 3.20000e-01
 1.70000e-01 6.66500e+01 3.40000e-01 1.38300e+01 1.50000e+00 5.00000e-02
 8.70000e-01 7.10000e-01 6.10000e-01 2.00000e-02 4.67100e+01 1.30000e-01
 2.09000e+00 3.70000e-01 4.51000e+00 3.30000e-01 2.70000e+00 6.70000e+00
 7.50000e+01 5.11000e+00 1.38000e+00 9.80000e-01 4.04000e+00 2.26000e+00
 2.75400e+01 9.60000e-01 2.01000e+00 2.06000e+00 3.30900e+01 1.20000e-01
 1.80000e-01 2.00000e-01 7.20000e-01 5.70000e-01 1.00000e-02 8.80000e-01
 8.09000e+00 6.90000e-01 2.60000e-01 6.00000e+01 3.18000e+00 2.10000e-01
 3.76700e+01 9.00000e-02 1.08000e+00 2.80000e-01 2.07900e+01 9.20000e+00
 2.67000e+00 9.30000e-01 2.14000e+00 1.10000e-01 4.00600e+01 1.95000e+00
 2.53000e+00 1.45200e+01 8.80000e+00 4.00000e-01 3.81000e+00 5.40000e-01
 5.80000e-01 5.00000e+00 1.01100e+01 7.00000e+01 3.60000e+00 1.00000e-01
 2.30000e-01 9.33000e+00 5.20000e-01 2.68000e+01 4.78000e+00 6.40000e-01
 1.38200e+01 2.50000e+01 3.09000e+00 2.80000e+01 4.59000e+00 3.48000e+00
 3.88000e+00 2.13000e+00 3.96000e+00 8.60000e-01 3.92000e+00 2.87000e+00
 7.80000e-01 1.40000e-01 4.82000e+00 7.70000e-01 6.83000e+00 2.07100e+01
 8.20000e-01 2.32400e+01 6.10000e+00 6.82000e+00 1.01600e+01 7.20000e+01
 4.39000e+00 6.00000e-01 1.00000e+00 2.82000e+00 8.40000e-01 4.28000e+00
 1.22000e+00 5.16000e+00 2.14200e+01 1.60000e-01 1.63000e+02 3.41000e+00
 2.20000e-01 5.08000e+00 2.90000e-01 3.52000e+00 2.88000e+00 1.05700e+01
 5.03000e+00 2.61000e+01 4.00000e-02 4.20000e-01 1.10000e+00 3.50000e-01
 3.51000e+00 5.17000e+00 2.00000e+00 4.08000e+00 1.20000e+00 1.55000e+00
 4.80000e+01 8.00000e-02 2.22000e+00 1.48000e+00 7.54000e+00 9.84000e+00
 1.15000e+00 6.30000e-01 8.87000e+00 4.44000e+01 8.16900e+01 1.23900e+01
 3.80000e+00 4.43000e+00 4.50000e+00 8.56900e+01 8.00000e+01 1.75000e+00
 5.50000e+01 2.27000e+00 3.84000e+00 3.10000e-01 6.89000e+00 3.44100e+01
 3.80000e-01 8.00000e-01 1.11000e+00 6.29500e+01 2.78000e+00 8.90000e+00
 3.13300e+01 1.77000e+00 1.05900e+01 1.19100e+02 1.16000e+00 5.32000e+00
 1.09500e+01 1.79000e+00 3.04000e+00 2.63000e+01 1.04000e+00 3.20000e+00
 1.71000e+00 2.18000e+00 3.08000e+00 4.28000e+02 1.40000e+00 1.70000e+00
 1.25000e+00 2.33100e+01 2.15000e+00 1.36000e+00 9.92000e+00 3.75000e+00
 1.43000e+00 8.40000e+01 1.37300e+01 7.00000e-01 5.66000e+00 1.90000e-01
 5.66800e+01 7.29000e+01 3.37000e+00 8.90000e-01 6.50000e+00 1.86900e+01
 6.68000e+00 6.70000e-01 3.00000e-01 3.72000e+00 9.00000e-01 1.50300e+01
 2.30000e+00 2.80000e+00 6.62800e+01 1.95000e+01 3.79000e+00 9.50000e+01
 4.31000e+00 1.17000e+00 1.22500e+01 1.74000e+00 1.91000e+00 1.04000e+01
 2.05300e+01 1.51000e+00 4.48000e+00 4.59990e+02 1.02000e+00 6.61000e+00
 1.10200e+01 2.40800e+01 3.25000e+00 1.68000e+00 3.60000e-01 5.28000e+00
 3.24000e+01 3.13000e+00 9.50000e-01 8.30000e-01 4.26000e+00 1.59000e+00
 7.86000e+00 1.13945e+03 1.68000e+01 2.42000e+00 8.66000e+00 1.59100e+01
 1.32000e+00 7.50000e-01 3.67900e+01 1.00000e+01 1.00000e+02 3.46000e+00
 1.19000e+00 6.88000e+00 9.10000e-01 2.08000e+00 3.82000e+00 7.90000e-01
 1.44000e+00 2.40000e+00 1.11500e+01 9.74000e+00 2.68000e+00 2.50000e-01
 6.38000e+00 4.42800e+01 3.61100e+01 3.94900e+01 2.58000e+00 2.32000e+00
 1.08000e+01 6.75000e+00 4.40000e-01 9.20000e-01 3.86000e+00 1.94000e+00
 1.21600e+01 3.14600e+01 1.02500e+01 8.50000e-01 2.49800e+01 1.15000e+01
 5.53000e+00 4.32000e+00 1.62000e+00 3.02000e+00 4.50900e+01 1.30000e+00
 3.16000e+00 2.51000e+00 1.76000e+00 3.65600e+01 7.89000e+01 2.99100e+01
 2.62000e+00 3.07000e+00 1.18000e+00 2.99000e+00 1.06000e+00 5.50300e+01
 1.04000e+02 2.24400e+01 4.08300e+01 2.85000e+00 1.05000e+00 1.71300e+01
 3.24400e+01 2.83000e+00 6.58000e+00 2.40000e-01 2.60000e+01 1.70200e+01
 5.37000e+00 7.02000e+00 3.24000e+00 6.47900e+01 6.90000e+00 1.87000e+00
 1.24000e+00 2.55000e+00 6.42000e+00 9.42000e+00 3.98000e+00 4.89700e+01
 3.49000e+00 1.67000e+00 5.90000e+01 5.46700e+01 8.10000e-01 5.55400e+01
 6.48000e+00 5.77000e+00 2.07000e+00 1.87200e+01 2.43000e+00 5.81000e+00
 8.11700e+01 6.21000e+00 9.58000e+00 4.30000e-01 5.92700e+01 4.89000e+00
 2.34000e+00 6.69000e+00 8.90000e+01 1.28000e+00 3.29000e+00 6.30000e+00
 2.78600e+01 5.30000e+00 5.94000e+00 2.46300e+01 7.78000e+01 7.87000e+00
 6.00000e+00 1.51200e+01 1.23500e+01 4.25000e+00 2.49000e+00 3.50000e+01
 6.50000e-01 2.64000e+00 1.44010e+02 4.99000e+00 2.73000e+00 3.57000e+00
 6.13000e+00 1.97000e+00 2.68700e+01 1.81800e+01 3.03000e+00 6.08000e+00
 1.50000e+01 2.87700e+01 4.74000e+00 1.20400e+01 6.06400e+01 3.32100e+01
 6.20000e-01 1.07000e+00 5.08800e+01 9.90000e-01 2.95000e+00 2.12000e+00
 2.56000e+00 2.50000e+00 4.37000e+00 1.57000e+00 4.15100e+01 6.72100e+01
 1.01200e+01 1.62650e+02 1.91300e+01 5.95000e+00 9.37000e+00 2.36000e+00
 2.48000e+00 1.45000e+00 1.47000e+00 4.02000e+00 3.70700e+01 8.00000e+00
 6.60000e-01 4.61000e+00 5.10000e-01 8.39000e+00 5.61000e+00 5.00000e-01
 1.60000e+00 2.23500e+01 4.35000e+00 2.03000e+00 6.49100e+01 6.62000e+00
 7.37950e+02 4.00000e+00 4.34200e+01 2.70000e+02 6.80000e-01 4.40000e+01
 5.49000e+00 6.73000e+00 9.21000e+00 1.63700e+01 7.96200e+01 2.51900e+01
 1.24200e+01 2.21000e+00 3.56000e+00 5.31000e+00 8.30000e+01 6.85000e+00
 5.30000e-01 7.04000e+00 1.78100e+01 1.72300e+01 1.39400e+01 1.86000e+00
 6.16000e+00 5.83300e+01 1.63000e+00 3.21000e+00 5.26000e+00 9.46900e+01
 1.41000e+00 5.23000e+00 5.16800e+01 3.16000e+01 3.54000e+00 1.72000e+01
 9.78000e+00 1.26600e+01 8.44500e+01 7.37000e+00 1.10000e+01 9.15000e+00
 2.10200e+01 1.70400e+01 8.54000e+00 1.20200e+01 2.20000e+02 2.00000e+01
 2.30900e+01 6.56700e+01 1.35900e+01 2.02000e+00 1.51500e+01 2.26900e+01
 4.20000e+01 2.41000e+00 8.08000e+00 1.92000e+00 9.52000e+01 4.10000e-01
 7.30000e-01 1.03000e+00 2.45000e+00 2.97000e+00 1.88000e+00 8.07500e+01
 1.73000e+00 5.15000e+00 1.07400e+01 1.23000e+00 2.36000e+01 1.27500e+01
 4.23000e+00 3.68000e+00 6.52000e+00 1.69000e+00 8.31000e+00 1.29800e+01
 2.86000e+01 7.05000e+00 1.54000e+00 3.48500e+01 4.53400e+01 5.39000e+00
 6.27000e+00 2.10000e+00 6.39000e+01 5.15900e+01 1.90000e+00 5.98000e+00
 6.77000e+00 1.33000e+00 9.70000e-01 1.84000e+00 4.90000e+01 7.99000e+00
 5.62000e+00 1.02200e+02 1.39000e+00 7.76000e+00 2.47000e+00 3.17900e+01
 1.66000e+00 7.85000e+00 4.42400e+01 4.18000e+00 9.39000e+00 2.12500e+01
 1.27000e+00 1.65000e+00 1.03100e+01 2.50500e+01 9.62000e+00 1.96000e+00
 3.06000e+00 4.46000e+00 2.89000e+00 2.70800e+01 2.16000e+00 4.49700e+01
 7.65000e+00 5.00100e+01 5.14000e+00 1.56300e+01 9.00000e+01 5.75000e+00
 1.20000e+01 2.44000e+00 4.70000e-01 9.99000e+00 1.99700e+01 1.61000e+00
 7.09000e+00 4.88000e+00 1.68700e+01 1.72000e+00 2.40000e+01 4.09700e+01
 1.21000e+00 1.74800e+01 2.53100e+01 9.10900e+01 7.57000e+00 5.72000e+00
 1.37000e+00 3.63000e+00 6.20000e+01 1.52000e+00 1.60900e+01 1.94560e+02
 3.00000e+00 1.32700e+01 1.36300e+01 4.56900e+01 5.80000e+00 1.85000e+00
 4.44000e+00 1.93000e+00 8.73000e+00 2.73200e+01 4.63400e+01 1.88000e+01
 2.75000e+00 7.92700e+01 3.26600e+01 3.58000e+00 1.87700e+01 3.40000e+00
 1.30910e+02 3.99000e+00 1.18100e+01 2.04000e+00 2.82800e+01 6.27600e+01
 1.82000e+00 5.91700e+01 1.83000e+00 5.22000e+00 1.26100e+01 2.32600e+01
 9.20900e+01 2.02300e+01 4.13000e+00 6.53000e+00 1.58300e+01 3.91900e+01
 3.64000e+00 5.96000e+00 2.69000e+00 4.87000e+00 1.46850e+02 1.26000e+00
 5.40000e+00 4.54000e+00 9.17000e+00 8.92000e+00 1.33400e+01 5.10000e+00
 4.67000e+00 4.76000e+00 1.92600e+01 4.46600e+01 4.49000e+00 5.43000e+00
 2.98000e+00 1.64400e+01 2.39000e+00 5.10000e+01 8.34200e+01 6.14900e+01
 6.43300e+01 1.14800e+01 1.64350e+02 3.77000e+00 1.65500e+01 7.36000e+00
 4.60000e+00 1.04900e+01 8.51000e+00 1.35100e+01 2.12000e+01 1.49000e+00
 6.36900e+01 3.34000e+00 7.26000e+00 1.23300e+01 7.27000e+00 2.57700e+01
 5.18000e+00 6.02000e+00 2.90200e+01 1.75100e+01 5.92000e+00 5.24800e+01
 2.13200e+01 5.86700e+01 4.04000e+01 2.66700e+01 1.39200e+01 1.46000e+00
 2.61300e+01 1.47000e+02 1.52500e+01 4.70000e+00 2.80200e+01 2.66510e+02
 6.12000e+00 1.31000e+00 5.80800e+01 3.55300e+01 1.24990e+02 4.25000e+01
 2.20700e+01 1.20700e+01 5.50000e-01 4.41000e+00 3.28000e+00 1.66000e+01
 4.17000e+00 9.10000e+00 1.56000e+00 1.18000e+01 3.45000e+00 2.38900e+01
 1.65850e+02 9.98000e+00 1.48400e+01 4.42300e+01 4.41700e+01 2.17000e+00
 1.17600e+01 2.63000e+00 9.13800e+01 3.78000e+00 6.67000e+00 1.47400e+01
 1.52000e+01 2.84000e+00 2.59700e+01 1.04700e+01 3.06500e+01 1.78000e+00
 4.30000e+00 1.73700e+01 2.72000e+00 3.52500e+01 7.84600e+01 2.50000e+02
 1.29000e+00 2.60870e+02 2.04900e+01 7.51500e+01 3.66600e+01 2.88700e+01
 1.72200e+01 3.65000e+00 3.47000e+01 4.34000e+00 1.17100e+01 5.13200e+01
 1.02700e+01 2.34040e+02 4.62100e+01 5.60400e+01 9.12500e+01 6.31000e+00
 4.62000e+00 4.03200e+01 1.89000e+00 4.77000e+00 3.90300e+01 2.08800e+01
 2.96650e+02 7.72800e+01 3.80000e+01 3.92400e+01 8.48000e+00 5.59100e+01
 6.70700e+01 1.27300e+01 1.53100e+01 6.91000e+01 1.62700e+01 4.64000e+00
 3.39000e+00 2.00000e+02 3.02300e+01 7.77000e+00 3.22000e+00 5.97700e+01
 5.12000e+00 2.46000e+00 4.25200e+01 6.40000e+00 2.52000e+00 2.92000e+00
 1.50000e+02 1.04500e+01 2.77400e+01 3.66200e+01 6.99000e+00 1.80200e+01
 1.51900e+02 4.43600e+01 4.50000e+01 5.35300e+01 9.00000e+00 5.44000e+00
 8.07900e+01 2.25000e+00 4.47000e+00 2.33000e+00 1.29500e+01 1.02200e+01
 4.57600e+01 8.11300e+01 1.23400e+01 1.24950e+02 1.99000e+00 1.08600e+01
 2.60000e+00 4.16000e+00 1.21700e+01 8.69000e+00 4.57000e+00 2.19000e+01
 8.13900e+01 2.58200e+01 2.90000e+00 1.03800e+01 7.14900e+01 9.99200e+01
 6.01000e+00 2.19350e+02 4.10000e+00 3.52400e+01 1.58000e+00 2.02400e+01
 6.25000e+00 7.61000e+01 1.05400e+01 4.12000e+00 1.15200e+01 2.86900e+01
 1.00900e+01 3.30000e+00 3.21000e+01 8.50000e+00 2.76000e+00 4.14100e+01
 2.50300e+01 4.01000e+00 1.18400e+01 2.24000e+00 4.75000e+00 2.53000e+01
 6.09000e+00 3.73000e+00 4.90000e+00 1.62500e+01 4.06000e+00 6.39000e+00
 2.56100e+01 2.70000e+01 9.27000e+00 1.63100e+01 8.99900e+01 1.67300e+01
 1.03000e+01 3.43000e+00 3.55000e+00 8.19000e+00 2.21140e+02 1.16200e+01
 1.14000e+00 2.71000e+00 3.04700e+01 1.84000e+01 2.98800e+01 1.08400e+01
 5.69400e+01 7.83000e+00] 872
OTROSG
[0.000000e+00 8.984000e+01          nan 2.115800e+02 2.015000e+02
 8.517300e+02 6.510000e+00 9.470000e+00 6.500000e+02 4.178000e+01
 6.150000e+00 6.723000e+01 1.158000e+01 8.730000e+00 1.319000e+01
 3.500000e+00 1.800000e+02 1.037900e+03 4.191800e+02 5.036700e+02
 1.600000e+02 1.570000e+00 9.109000e+01 7.000000e-01 5.750500e+02
 2.330000e+02 2.800000e+02 4.830000e+00 4.006500e+02 4.945000e+01
 1.178000e+01 6.960000e+02 2.139000e+01 3.890000e+00 1.167000e+01
 3.216000e+01 3.450000e+01 1.626000e+02 3.937700e+02 5.733000e+01
 2.404000e+01 6.860000e+00 7.600000e-01 8.000000e-02 1.983300e+02
 2.500000e+02 1.052000e+01 1.314900e+02 7.383100e+02 7.800000e+02
 2.056000e+01 4.150000e+02 1.500000e+02 6.850000e+00 9.155000e+01
 2.043210e+03 1.823500e+02 7.700000e+02 1.309000e+01 6.000000e+01
 4.067000e+01 5.300000e+02 1.891700e+02 8.662000e+01 2.003000e+01
 1.722180e+03 6.507000e+01 2.301000e+01 4.450000e+00 1.500000e+00
 8.650000e+00 9.673000e+01 2.198600e+02 6.100000e+02 1.242000e+01
 8.590000e+01 3.429000e+01 6.310000e+00 7.794000e+01 1.440000e+00
 2.265000e+01 5.000000e+00 5.496000e+01 1.101000e+01 1.968300e+02
 7.930000e+00 3.760000e+00 3.400000e-01 1.520000e+01 4.628000e+01
 4.790000e+00 1.247000e+01 2.083000e+01 8.280000e+00 3.307000e+01
 4.470000e+02 1.800000e+01 9.193000e+02 2.961500e+02 6.206000e+01
 1.300000e-01 2.787000e+01 1.627230e+03 1.071000e+01 1.534000e+02
 3.076300e+02 4.770000e+00 4.808000e+01 3.300000e-01 8.390000e+00
 1.840000e+02 6.757200e+02 9.100000e-01 8.810000e+00 2.330000e+00
 1.511000e+01 3.658000e+01 2.580000e+00 6.122000e+01 4.850000e+00
 4.664000e+02 6.415600e+02 1.091000e+01 8.745000e+01 3.440000e+00
 1.280000e+00 6.197000e+01 1.483000e+03 5.720000e+00 5.300000e-01
 2.000000e-02 4.243800e+02 7.219000e+01 2.087000e+01 7.383000e+01
 3.007000e+01 8.877000e+01 4.327000e+01 6.062000e+01 2.470000e+00
 1.753900e+02 1.295000e+01 4.545000e+01 4.439400e+02 4.851700e+02
 2.530000e+00 1.625000e+01 1.341000e+01 6.300000e+01 7.400000e-01
 5.299000e+01 2.873000e+02 3.250000e+00 9.038000e+01 1.550000e+00
 2.000000e+02 3.415000e+01 2.920000e+00 2.485000e+01 1.516980e+03
 9.317000e+01 5.590000e+00 1.813000e+01 3.097000e+01 4.600000e+02
 1.163300e+02 4.619300e+02 5.799780e+03 1.590000e+00 1.442700e+02
 5.910000e+00 2.291500e+02 3.600000e-01 1.403000e+02 1.535100e+02
 3.817100e+02 5.143900e+02 1.396500e+02 3.400000e+02 2.798300e+02
 9.460000e+00 2.540000e+00 7.000000e+02 3.633100e+02 5.373000e+01
 1.184400e+02 1.750000e+00 7.100000e-01 9.018400e+02 2.120000e+00
 7.274000e+01 5.596400e+02 9.918000e+02 2.576000e+01 6.200000e-01
 1.655000e+01 8.880000e+00 7.400000e+01 1.963000e+01 1.300300e+02
 3.000000e+02 2.174800e+02 2.860000e+00 2.670000e+01 1.315700e+02
 1.694900e+02 1.904000e+01 2.103000e+01 4.640000e+00 1.591700e+02
 1.923000e+01 5.640000e+00 7.407000e+01 5.859000e+01 1.172600e+02
 7.250000e+01 1.450000e+02 1.846000e+01 5.000000e+01 2.222200e+02
 1.633000e+01 2.086000e+01 3.016200e+02 5.100000e+00 4.420000e+00
 5.201500e+02 9.000000e+02 1.900000e+00 2.900000e+00 2.617100e+02
 1.560000e+02 1.117000e+01 2.542000e+01 7.670000e+00 1.136720e+03
 1.195100e+02 1.065300e+02 3.927900e+02 2.904400e+02 2.086500e+02
 3.724000e+01 1.600000e+01 5.000000e-02 2.750400e+02 1.800000e-01
 4.199000e+01 3.270000e+01 2.780000e+00 8.279000e+01 1.630000e+03
 5.700000e-01 1.070000e+00 1.970000e+00 1.000000e-02 2.350000e+01
 9.760000e+00 6.391000e+01 1.750000e+02 1.168000e+01 2.718200e+02
 2.555600e+02 1.305800e+02 2.110000e+00 7.897000e+01 2.253000e+01
 9.320000e+00 5.513000e+01 1.320000e+00 1.470000e+02 3.430000e+01
 2.113100e+02 3.526000e+01 8.200000e+02 1.020660e+03 2.160000e+01
 6.301000e+02 3.200000e-01 3.125000e+01 3.232000e+01 2.399000e+01
 2.730000e+00 7.281000e+01 6.570000e+00 9.700000e+02 6.693600e+02
 1.350000e+02 7.945000e+01 2.771800e+02 1.160050e+03 7.639000e+01
 1.251000e+01 5.460000e+00 1.200000e-01 9.000000e-01 1.241200e+02
 4.621000e+01 5.950000e+02 1.210600e+02 2.329500e+02 7.371000e+01
 8.004000e+01 6.500000e+01 2.230000e+01 1.900000e+02 1.520000e+00
 2.917100e+02 4.400000e+01 4.500000e+02 3.100000e-01 1.181450e+03
 2.178400e+02 4.596000e+01 2.425500e+02 3.700000e-01 4.114900e+02
 1.850000e+02 4.257500e+02 1.175254e+04 5.330000e+00 1.400000e+00
 1.588300e+02 1.736000e+02 2.190000e+00 1.200000e+02 4.773000e+01
 1.323700e+02 1.068000e+01 2.321700e+02 1.189000e+02 3.108000e+01
 3.728000e+01 2.050000e+00 7.209000e+01 1.972000e+01 6.492000e+01
 1.221100e+02 3.744000e+01 6.510000e+01 1.406100e+02 3.640300e+02
 4.090000e+00 1.034300e+02 1.630000e+00 1.973000e+01 2.491000e+01
 1.806700e+02 1.000000e+03 3.107500e+02 3.790900e+02 3.095500e+02
 5.670000e+00 8.654100e+02 8.286000e+01 1.111800e+02 1.274000e+02
 1.050000e+03 1.520700e+02 3.752000e+01 2.074990e+03 1.771000e+01
 8.440000e+00 2.090000e+00 3.400000e+00 1.294510e+03 4.265000e+01
 1.837000e+01 2.339000e+01 4.267000e+01 8.319600e+02 1.000000e+00
 6.226200e+02 1.547000e+02 4.250000e+00 9.713000e+01 1.743000e+01
 4.710000e+01 2.560000e+00 2.415600e+02 3.375400e+02 6.074000e+01
 2.368000e+02 1.856750e+03 4.421000e+01 1.103000e+01 1.600000e-01
 9.029000e+01 1.081000e+01 6.693000e+01 1.406300e+02 2.479000e+01
 8.000000e+01 7.000000e-02 1.152000e+01 1.452000e+02 5.198000e+01
 6.600000e-01 4.900000e+00 9.260000e+00 1.012000e+01 2.700000e-01
 2.721000e+01 3.422400e+02 2.614000e+01 3.122000e+01 8.573000e+01
 1.815000e+01 4.530000e+00 5.480000e+00 1.250200e+02 1.851100e+02
 4.909000e+01 8.332000e+01 2.554000e+01 3.933000e+01 3.110000e+00
 2.495000e+01 6.405000e+01 1.501000e+01 9.142000e+01 9.000000e+01
 8.170000e+00 1.017000e+01 9.788000e+01 2.900000e+01 7.194000e+01
 5.613000e+01 6.300000e+02 3.055000e+01 2.430000e+02 1.518100e+02
 1.405000e+03 1.360000e+02 2.280000e+00 1.360000e+00 1.014000e+03
 5.165100e+02 4.365000e+01 1.650000e+03 2.659600e+02 1.869000e+01
 1.024000e+01 5.081000e+01 3.380000e+00 3.416700e+02 3.149000e+01
 1.085000e+01 1.088000e+03 5.960000e+00 3.200000e+01 4.352000e+02
 3.829100e+02 3.510000e+00 8.873300e+02 1.946000e+01 6.990000e+00
 1.775000e+01 1.933800e+02 2.190000e+01 1.992000e+01 1.359000e+01
 3.810000e+00 5.162600e+02 6.979000e+01 1.000000e-01 1.703000e+01
 1.047600e+02 6.087000e+01 8.556000e+01 1.019300e+02 6.000000e-02
 1.976800e+02 5.800000e-01 2.970100e+02 7.990000e+00 3.000000e+01
 9.217000e+01 1.240600e+02 2.750000e+01 4.855000e+01 8.585000e+01
 1.068000e+03 3.600000e+02 7.180000e+00 5.440000e+00 4.078000e+01
 2.762800e+02 2.880300e+02 1.572000e+01 2.266800e+02 3.983000e+02
 1.630850e+03 1.550000e+02 9.201000e+01 2.070000e+00 5.400000e+00
 3.281000e+01 2.742000e+01 1.481900e+02 5.000000e-01 4.000000e+00
 3.170000e+00 5.140000e+00] 502
PAIS
['PAISES BAJOS' 'ECUADOR' 'PANAMA' 'BRASIL' 'USA' 'MEXICO' 'EL SALVADOR'
 'COSTA RICA' 'CURAZAO' 'ESPANA' 'PERU' 'GUATEMALA' 'HONDURAS' 'CHILE'
 'REP DOMINICANA' 'BOLIVIA' 'CANADA' 'VENEZUELA' 'REINO UNIDO'
 'PUERTO RICO'] 20

Vemos que para variables como _CODSAL y VIA hay menos valores que los posibles mencionados en el diccionario y que sin embargo son posibles. También vemos que para la variable PAIS hay más valores posibles que los mencionados en el diccionario por lo que es mejor removerlos pues no son esperados. También es posible ver que el tipo de la variable CANTI no corresponde al esperado pues debería ser una variable discreta y además presenta valores ireales que serán especificados más adelante.

In [45]:
df.dtypes
Out[45]:
COD_SAL     object
VIA          int64
CANTI      float64
PBK        float64
PNK        float64
FOBPES     float64
FOBDOL     float64
FLETES     float64
SEGURO     float64
OTROSG     float64
PAIS        object
dtype: object

Describa el entendimiento que tiene del negocio a partir de los datos suministrados.

Los datos están relacionados con exportaciones realizadas en Colombia en el mes de Diciembre en un año específico. En el dataset hay 9 variables numéricas y 3 categóricas que describen una exportación.

  • ID ID de la exportación
  • COD_SAL Lugar de donde sale la mercancía.
  • VIA Vía de transporte.
  • CANTI Número de unidades del producto exportado
  • PBK Peso bruto de la mercancía (En Kilos)
  • PNK Peso neto de la mercancía (En Kilos)
  • FOBDOL Valor total de la mercancía (en miles de dólares)
  • FOBPES Valor total de la mercancía (en millones de pesos)
  • FLETES Valor fletes de la mercancía (En pesos)
  • SEGURO Valor asociado a seguros de la mercancía
  • OTROSG Otros gastos de la mercancía
  • PAIS País destino de la mercancía exportada

2. Análisis y propuestas accionables.

Hipótesis de interés.

  1. La ubicación del lugar de dónde sale la mercancía está relacionada con el país de destino, entre más adentro geográficamente se encuentre el lugar de salida, más cerca se encuentra el país de destino.
  2. El peso bruto está relacionado de manera directa con el valor total de la mercancía en millones de pesos, entre más pesada sea la mercancía, más va a costar.
  3. El valor de otros gastos de la mercancía está relacionado con la vía de transporte usada para la exportación, entre menos común sea mayor va a ser el valor de los otros gastos.
  4. El número de unidades del producto exportado está relacionado con la ubicación del lugar de dónde sale la mercancía, entre más costera sea la ubicación del lugar de dónde sale la mercancía más unidades se van a exportar

Hipótesis 1

La ubicación del lugar de dónde sale la mercancía está relacionada con el país de destino, entre más adentro geográficamente se encuentre el lugar de salida, más cerca se encuentra el país de destino.

In [46]:
df2 = df.groupby(['PAIS', 'COD_SAL'])['PAIS'].count().unstack('COD_SAL').fillna(0)
ax = df2.plot(kind='bar', stacked=True,figsize=(15,15))
NUM_COLORS = 20

cm = plt.get_cmap('gist_rainbow')
ax.set_title("Cantidad de exportaciones por país de destino y región de salida")
Out[46]:
Text(0.5, 1.0, 'Cantidad de exportaciones por país de destino y región de salida')

Podemos ver que hay regiones como CTG o BUN que participan activamente en algunas exportaciones mientras que en otras no lo que podría ayudarnos a dar una idea de la validez de la hipótesis

Hipótesis 2

El peso bruto está relacionado de manera directa con el valor total de la mercancía en millones de pesos, entre más pesada sea la mercancía, más va a costar.

In [47]:
df.plot.scatter(x='FOBPES', y='PBK')
Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0x7ffade8980f0>

Acá podemos ver que hay una especie de relación lineal entre el peso bruto y el valor total de la mercancía en millones de pesos

Hipótesis 3.

El valor de otros gastos de la mercancía está relacionado con la vía de transporte usada para la exportación, entre menos común sea mayor va a ser el valor de los otros gastos.

In [48]:
df[["OTROSG","VIA"]].plot(kind="bar")
sns.violinplot("VIA","OTROSG",data=df)
Exception ignored in: <function TransformNode.set_children.<locals>.<lambda> at 0x7ffadf56bae8>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/matplotlib/transforms.py", line 177, in <lambda>
    ref = weakref.ref(self, lambda ref, sid=id(self),
KeyboardInterrupt
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-48-efeeba2ad7ee> in <module>()
----> 1 df[["OTROSG","VIA"]].plot(kind="bar")

/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py in __call__(self, *args, **kwargs)
    845                     data.columns = label_name
    846 
--> 847         return plot_backend.plot(data, kind=kind, **kwargs)
    848 
    849     __call__.__doc__ = __doc__

/usr/local/lib/python3.6/dist-packages/pandas/plotting/_matplotlib/__init__.py in plot(data, kind, **kwargs)
     59             kwargs["ax"] = getattr(ax, "left_ax", ax)
     60     plot_obj = PLOT_CLASSES[kind](data, **kwargs)
---> 61     plot_obj.generate()
     62     plot_obj.draw()
     63     return plot_obj.result

/usr/local/lib/python3.6/dist-packages/pandas/plotting/_matplotlib/core.py in generate(self)
    261         self._compute_plot_data()
    262         self._setup_subplots()
--> 263         self._make_plot()
    264         self._add_table()
    265         self._make_legend()

/usr/local/lib/python3.6/dist-packages/pandas/plotting/_matplotlib/core.py in _make_plot(self)
   1403                     label=label,
   1404                     log=self.log,
-> 1405                     **kwds,
   1406                 )
   1407             self._add_legend_handle(rect, label, index=i)

/usr/local/lib/python3.6/dist-packages/pandas/plotting/_matplotlib/core.py in _plot(cls, ax, x, y, w, start, log, **kwds)
   1328     @classmethod
   1329     def _plot(cls, ax, x, y, w, start=0, log=False, **kwds):
-> 1330         return ax.bar(x, y, w, bottom=start, log=log, **kwds)
   1331 
   1332     @property

/usr/local/lib/python3.6/dist-packages/matplotlib/__init__.py in inner(ax, data, *args, **kwargs)
   1563     def inner(ax, *args, data=None, **kwargs):
   1564         if data is None:
-> 1565             return func(ax, *map(sanitize_sequence, args), **kwargs)
   1566 
   1567         bound = new_sig.bind(ax, *args, **kwargs)

/usr/local/lib/python3.6/dist-packages/matplotlib/axes/_axes.py in bar(self, x, height, width, bottom, align, **kwargs)
   2402             elif orientation == 'horizontal':
   2403                 r.sticky_edges.x.append(l)
-> 2404             self.add_patch(r)
   2405             patches.append(r)
   2406 

/usr/local/lib/python3.6/dist-packages/matplotlib/axes/_base.py in add_patch(self, p)
   1916         self._set_artist_props(p)
   1917         if p.get_clip_path() is None:
-> 1918             p.set_clip_path(self.patch)
   1919         self._update_patch_limits(p)
   1920         self.patches.append(p)

/usr/local/lib/python3.6/dist-packages/matplotlib/artist.py in set_clip_path(self, path, transform)
    789         # will be hit at least once.
    790         self.pchanged()
--> 791         self.stale = True
    792 
    793     def get_alpha(self):

/usr/local/lib/python3.6/dist-packages/matplotlib/artist.py in stale(self, val)
    226 
    227         if val and self.stale_callback is not None:
--> 228             self.stale_callback(self, val)
    229 
    230     def get_window_extent(self, renderer):

/usr/local/lib/python3.6/dist-packages/matplotlib/artist.py in _stale_axes_callback(self, val)
     49 def _stale_axes_callback(self, val):
     50     if self.axes:
---> 51         self.axes.stale = val
     52 
     53 

/usr/local/lib/python3.6/dist-packages/matplotlib/artist.py in stale(self, val)
    226 
    227         if val and self.stale_callback is not None:
--> 228             self.stale_callback(self, val)
    229 
    230     def get_window_extent(self, renderer):

/usr/local/lib/python3.6/dist-packages/matplotlib/figure.py in _stale_figure_callback(self, val)
     45 def _stale_figure_callback(self, val):
     46     if self.figure:
---> 47         self.figure.stale = val
     48 
     49 

/usr/local/lib/python3.6/dist-packages/matplotlib/artist.py in stale(self, val)
    222         # draw stack and is not expected to be drawn as part of the normal
    223         # draw loop (when not saving) so do not propagate this change
--> 224         if self.get_animated():
    225             return
    226 

KeyboardInterrupt: 

Hipótesis 4.

El número de unidades del producto exportado está relacionado con la ubicación del lugar de dónde sale la mercancía, entre más costera sea la ubicación del lugar de dónde sale la mercancía más unidades se van a exportar

In [ ]:
ax = df.groupby("COD_SAL")["CANTI"].count().plot(kind="bar")
ax.set_ylabel("CANTIDAD")

Acá podemos ver que hay algunas regiones de salida que tienen una gran cantidad de exportaciones y entre ellas hay presentes varias costeras como Cartagena.

Problemas de calidad de los datos que puedan afectar los análisis propuestos en las hipótesis

La cantidad de valores nulos mencionada previamente podría afectar. Sin embargo, dado el bajo porcentaje su impacto al eliminarlas sería leve por lo que es lo que haré, también sería conveniente validar con un experto en el negocio la razón de estos. Asimismo, los valores duplicados como se mencionó previamente no puede afectar notablemente los análisis propuestos en las hipótesis pues su porcentaje es poco y considero que a pesar de lo anterior y dadas las unidades de las variables las probabilidades de presencia de estas coincidencias son nulas por lo que es mejor eliminarlas. Todas las variables corresponden exactamente a la realidad planteada en el diciconario de datos, a excepción de las variables

  • PAIS que cuenta con 4 registros fuera de los mencionados en el diccionario.
  • PBK que cuenta con valores en 0
  • CANTI que cuenta con valores decimales

Ajustes sobre los datos

Inicialmente se eliminarán las filas que en la variable país contengan a alguno de los países no especificados en el diccionario

También se eliminarán las filas que contengan en la variable PBK el valor de 0 y que en canti cuenten con valores decimales y se cambiará el tipo a Int.

Finalmente se eliminarán las filas duplicadas y las que contengan valores nulos.

In [ ]:
aRemov = ["BRASIL","HONDURAS","VENEZUELA","REINO UNIDO"] 
for a in aRemov:
  df = df[df.PAIS!=a]
print(df.PAIS.unique(),len(df.PAIS.unique()))
In [ ]:
df = df[df["PBK"]>0]
df = df[df["CANTI"]%1==0]
df["CANTI"] = df["CANTI"].astype(int)
In [ ]:
df = df.drop_duplicates()
In [ ]:
df.dropna(axis=0,how="all")

Validación de hipótesis planteadas

Hipótesis 1

La ubicación del lugar de dónde sale la mercancía está relacionada con el país de destino, entre más adentro geográficamente se encuentre el lugar de salida, más cerca se encuentra el país de destino.

In [ ]:
print(df["COD_SAL"].unique())
In [ ]:
def freq_relativFil(col,df):
  Tabla = pd.crosstab(index=df[col],columns=df["COD_SAL"],margins=True)
  cols = list(Tabla.columns)
  cols[-1] = "Total_"+col
  Tabla.columns = cols
  idxs = list(Tabla.index)
  idxs[-1] = "Total_y"
  Tabla.index = idxs
  return Tabla.div(Tabla["Total_"+col], axis=0)

Tabla = freq_relativFil('PAIS',df)
Tabla

ax = (Tabla[df["COD_SAL"].unique()]).plot(kind='bar',figsize=(15,15),width = 0.8,edgecolor=None, stacked=True)
plt.legend(labels=Tabla.columns,fontsize= 14)
plt.title("% de exportaciones a cada destino por región de origen",fontsize= 16)

plt.xticks(fontsize=14, rotation=90)
for spine in plt.gca().spines.values():
    spine.set_visible(False)
plt.yticks([])

# Add this loop to add the annotations
for p in ax.patches:
    width, height = p.get_width(), p.get_height()
    x, y = p.get_xy() 
    ax.annotate('{:.0%}'.format(height), (x, y + height + 0.01))
In [ ]:
def contingencia(col,df):
  Tabla = pd.crosstab(index=df[col],columns=df["COD_SAL"])
  return Tabla

def chiquad(Tabla, prob = 0.95):
  estadistico, p_value, grados_libertad, freq_esperadas = chi2_contingency(Tabla)
  print(f"El estadístico de prueba es {estadistico}, el p_value es {p_value}, los grados de libertad {grados_libertad} y las frecuencias esperadas son:\n {freq_esperadas}")
  print("Interpretando p_value con probabilidad del {}%".format(prob*100))
  alpha = 1.0 - prob
  if p_value <= alpha:
    print('Dependientes (Se rechaza H0)')
  else:
    print('No son dependientes (No se puede rechazar H0)')
  print("Interpretando estadístico de prueba con probabilidad del {0}% y {1} grados de libertad".format(prob*100,grados_libertad))
  # interpret test-statistic
  critical = chi2.ppf(prob, grados_libertad)
  if abs(estadistico) >= critical:
    print('Dependientes (Se rechaza H0)')
  else:
    print('No son dependientes (No se puede rechazar H0)')
Tabla = contingencia("PAIS",df)
Tabla
In [ ]:
chiquad(Tabla)

Las variables COD_SAL y PAIS están relacionadas.

Hipótesis 2

El peso bruto está relacionado de manera directa con el valor total de la mercancía en millones de pesos, entre más pesada sea la mercancía, más va a costar.

In [ ]:
sns.jointplot("FOBPES","PBK",data=df)
In [ ]:
df[["FOBPES","PBK"]].corr(method="pearson")
In [ ]:
df[["FOBPES","PBK"]].corr(method="spearman")

Dado que las variables no están distribuidas normalmente la correlación de Pearson no es la adecuada para determinar qué tan correlacionadas están. Sin embargo está la correlación de Spearman que sirve para determinar y nos indica que hay un 72.8% de correlación entre estas variables

Impacto en la ley de protección de datos

De acuerdo a los fines del DANE no hay restricciones legales con los datos pues no presentan información sensible y se encuentran a disposición del público para su uso. Sin embargo, hay que ser responsable con los resultados que se obtengan y su publicación.